# Plot boxplot

library(ggplot2)
library(ggpubr)
library(dplyr)

amr <- read.table("./ab_namr.tsv", sep = " ", header = T) # File where is specified the number of AMR genes each genome has (Data from SupplDataS1)

groups <- read.table("./groups.tsv", sep = "\t", header = T)

merge_box <- merge(amr, groups, by = 1)

g_couunt <- merge_box %>% group_by(Groups) %>% tally()
ggplot(merge_box, aes(y= X1, x = Groups, fill = Groups)) + 
  geom_violin(show.legend= FALSE, adjust= 1.25, width = 0.5) + geom_boxplot(width = 0.12, colour ="black", outliers = FALSE, show.legend = FALSE)+
  xlab("") + ylab("No. of AMR genes") + scale_fill_manual(values = c("#EE8262","#BFEFFF")) + #geom_hline(yintercept = mean(data_a$total_phages),linetype = "dashed", color = "black", linewidth= 1) + scale_y_continuous(limits = c(0, 9), breaks = seq(0, 9, by = 2)) +
  theme_minimal() + theme(axis.title = element_text(size = 13), axis.text = element_text(size = 13))  + stat_compare_means(comparisons= list(c("Group 1","Group 2")),label="p.signif") + geom_text(data = g_couunt, aes(x=Groups, y = Inf, label= paste0("n = ",n)), vjust = 5, hjust = -0.5, size = 4)

wt <- wilcox.test(merge_box[merge_box$Groups == "Group 1",]$X1, merge_box[merge_box$Groups == "Group 2",]$X1)

# Check normality
sampling <- merge_box[sample(nrow(merge_box), 5000), ]
shapiro.test(sampling$X1)
ggdensity(sampling$X1, 
          main = "Density plot of AMR genes",
          xlab = "No. AMR genes")
